#------------------------------------------------------------------------------#
# Business & Politics
# Placebo Test Data Preparation
# Author: Jeff Allen
# Last updated: November 19, 2020
#
# Table of Contents: 
# 1) Import and Formatting
# 2) Market Model and Abnormal Returns
#  2.1) Huawei: Market Model and Abnormal Returns
#  2.2) ZTE: Market Model and Abnormal Returns
#  2.3) Surveillance: Market Model and Abnormal Returns
# 3) Combine and Export
#------------------------------------------------------------------------------#

## SET WORKING DIRECTORY

library(reshape2)
library(DescTools)

## Turn off scientific notation
options(scipen = 999)

#------------------------------------------------------------------------------#
# (1) Import and formatting
#------------------------------------------------------------------------------#

## Import CRSP Data
# These are the returns of the U.S. suppliers of each sanctioned firm
# They are not the returns of the Chinese companies
zte.df <- read.csv("zte_crsp.csv", stringsAsFactors = FALSE)
huawei.df <- read.csv("huawei_crsp.csv", stringsAsFactors = FALSE)
surveillance.df <- read.csv("surveillance_crsp.csv", stringsAsFactors = FALSE)

## Check structures
str(zte.df) # Returns are character
str(huawei.df) # Returns are character
str(surveillance.df) # Returns are numeric

## Convert and inspect ZTE Returns
zte.df$RET <- as.numeric(zte.df$RET)
zte.na <- zte.df[which(is.na(zte.df$RET)),] 
# LITE is NA (08/04/2015)
rm(zte.na)

## Convert and inspect Huawei Returns
huawei.df$RET <- as.numeric(huawei.df$RET)
huawei.na <- huawei.df[which(is.na(huawei.df$RET)),] 
# AQ is NA (11/03/2017)
rm(huawei.na)

## Summarize
summary(zte.df)
summary(huawei.df)
summary(surveillance.df)

## Multiply returns by 100
zte.df$RET <- zte.df$RET * 100
huawei.df$RET <- huawei.df$RET * 100
surveillance.df$RET <- surveillance.df$RET * 100

## Convert dates
zte.df$date <- as.Date(zte.df$date, format = "%m/%d/%Y")
huawei.df$date <- as.Date(huawei.df$date, format = "%m/%d/%Y")
surveillance.df$date <- as.Date(surveillance.df$date, format = "%m/%d/%Y")

## Reshape to wide
zte.df.w <- dcast(zte.df, date ~ TICKER, value.var = "RET")
huawei.df.w <- dcast(huawei.df, date ~ TICKER, value.var = "RET")
surveillance.df.w <- dcast(surveillance.df, date ~ TICKER, value.var = "RET")

##-- Missing Data --##

## Summarize: which are missing? (identify tickers)
summary(zte.df.w) 
# JDSU, LITE, ON, ONNN, QRVO, RFMD, VIAV 
summary(huawei.df.w) 
# AQ, SSC, IDEX 
summary(surveillance.df.w) 
# None

#--- ZTE adjustments ---# 
## Note: Checked the nature of these in CRSP
# VIAV used to be JDSU
zte.df.w$VIAV <- with(zte.df.w, ifelse(is.na(VIAV), JDSU, VIAV))
# ON used to be ONNN
zte.df.w$ON <- with(zte.df.w, ifelse(is.na(ON), ONNN, ON))
# QRVO used to be RFMD
zte.df.w$QRVO <- with(zte.df.w, ifelse(is.na(QRVO), RFMD, QRVO))
# Drop JDSU, ONNN, RFMD, and LITE
# Why LITE? First reported date is within estimation window
zte.df.w <- zte.df.w[-c(30, 33, 48, 53)]

#--- Huawei adjustments ---# 
## Note: Checked the nature of these in CRSP
# Ideanomics (IDEX) used to be Seven Stars Cloud (SSC)
huawei.df.w$IDEX <- with(huawei.df.w, ifelse(is.na(IDEX), SSC, IDEX))
# AQ is missing days in estimation period, so needs to be dropped
# Drop AQ and SSC
huawei.df.w <- huawei.df.w[-c(9, 68)] 

## Clean up
rm(huawei.df, surveillance.df, zte.df)

#------------------------------------------------------------------------------#
# (2) Market Model and Abnormal Returns
#------------------------------------------------------------------------------#

##-- Preliminary Steps --##

## Import Nasdaq
nasdaq.df <- read.csv("nasdaq.csv", stringsAsFactors = FALSE)
nasdaq.df$date <- as.Date(nasdaq.df$date, format = "%m/%d/%Y")

## Identify window length
wind.l <- 5

## Identify estimation length
est.l <- 253

#------------------------------------------------------------------------------#
# (2.1) Huawei: Market Model and Abnormal Returns
#------------------------------------------------------------------------------#

##-- Huawei: Formatting --##

## Re-assign df
df <- huawei.df.w

## Merge nasdaq
df <- merge(df, nasdaq.df, all.x = TRUE)

## Add identifier to column names and replace
c.names <- colnames(df)[-1]
c.names <- paste0(c.names, ".H") # Huawei (H)
colnames(df)[-1] <- c.names
rm(c.names)

## Identify t = 0
t.zero <- which(df$date == "2018-09-26")

## Create time identifiers and range
lb.e <- t.zero - (wind.l + est.l) # Lower bound (estimation)
ub.w <- t.zero + wind.l # Upper bound (window)
range.w <- lb.e:ub.w

## Create window
window.df <- df[range.w,]

## Re-name rows
rownames(window.df) <- 1:nrow(window.df)

## Check NAs
summary(window.df)

## Add time var
window.df$time <- ((wind.l + est.l)*-1):wind.l

## Inspect over event window
window.df[(nrow(window.df)-(2*wind.l)):nrow(window.df),
          c(1,ncol(window.df))] # Looks good

## Drop the date column
window.df <- window.df[-1]

## Clean up
rm(df, lb.e, range.w, t.zero, ub.w)

##-- Huawei: Estimate alpha and beta --##

## Set up data frame
market.df <- data.frame(
  firm = colnames(window.df)[-c(ncol(window.df), ncol(window.df) - 1)],
  alpha = NA,
  beta = NA,
  stringsAsFactors = FALSE
)

str(market.df)

## Ensure names align
identical(market.df$firm,
          colnames(window.df)[-c(ncol(window.df), ncol(window.df) - 1)])

## Loop over firms, generating alpha and beta
n <- length(market.df$firm)

for (i in 1:n){
  fit <- lm(window.df[1:est.l,i] ~ nasdaq.H, data = window.df[1:est.l,])
  market.df[i,2] <- fit$coefficients[1]
  market.df[i,3] <- fit$coefficients[2]
}

rm(i, fit)

## Check for consistency
(fit.1 <- lm(window.df[1:est.l,1] ~ nasdaq.H, data = window.df[1:est.l,]))
(fit.5 <- lm(window.df[1:est.l,5] ~ nasdaq.H, data = window.df[1:est.l,]))
(fit.10 <- lm(window.df[1:est.l,10] ~ nasdaq.H, data = window.df[1:est.l,]))
rm(fit.1, fit.5, fit.10)

##-- Huawei: Combine Data --##

## Melt data
firms.df <- melt(window.df, id.vars = "time")
str(firms.df)
firms.df$variable <- as.character(firms.df$variable)

## Extract nasdaq and eliminate from firms
nasdaq.i <- which(firms.df$variable == "nasdaq.H")
n.df <- firms.df[nasdaq.i,]
firms.df <- firms.df[-nasdaq.i,]

# Rename nasdaq & Re-Organize
colnames(n.df)[3] <- "nasdaq"
n.df <- n.df[-2]
rownames(n.df) <- 1:nrow(n.df)

##-- Huaqei: Create elements for BMP stat --##
# See: BMP (1991, Appendix, 269)
n.est.df <- n.df[1:est.l,]
T.est <- nrow(n.est.df) # "T.i"
Rm.bar <- mean(n.est.df$nasdaq)
n.est.df$sqd <- (n.est.df$nasdaq - Rm.bar)^2 # Squared deviations R_mt from Rm_bar
Rm.ssd <- sum(n.est.df$sqd) # Sum of squared deviations R_mt from Rm_bar
rm(n.est.df)

## Rename
colnames(firms.df)[2] <- "firm"

## Merge with alpha and beta
firms.df <- merge(firms.df, market.df, all.x = TRUE)

## Merge nasdaq
firms.df <- merge(firms.df, n.df, all.x = TRUE)

## Clean up
rm(market.df, n.df, window.df, n, nasdaq.i)

##-- Huawei: Generate returns --##

## Risk-adjusted expected return
firms.df$er <- with(firms.df, alpha + (beta * nasdaq))

## Risk-adjusted abnormal return
firms.df$ar <- with(firms.df, value - er)

## SD of AR during estimation period (for BMP statistic)
sd.ar.est <- aggregate(ar ~ firm, data = firms.df[firms.df$time < -wind.l,], sd)
colnames(sd.ar.est)[2] <- "sd.ar.est"

## Bind SD
firms.df <- merge(firms.df, sd.ar.est, all.x = TRUE)

## Sort
index <- with(firms.df, order(firm, time))
firms.df <- firms.df[index,]
rownames(firms.df) <- 1:nrow(firms.df)
rm(index)

## Forecast error adjustment factor (BMP statistic)
firms.df$fe_adj <-
  with(firms.df, sqrt(1 + (1/T.est) + (nasdaq - Rm.bar)^2 / Rm.ssd))

## Standardized residual (for BMP statistic)
# Apply the forecast error to observations in the event window
firms.df$sr <- 
  with(firms.df,
       ifelse(time < -wind.l, ar / sd.ar.est, ar / (sd.ar.est * fe_adj)))

##-- Huawei: Develop data frames --##

## Add indicator for event
firms.df$event <- "Huawei"

## Create data frame
huawei.df <- firms.df[c(1,2,8,11,12)]

## Clean up
rm(firms.df, Rm.bar, Rm.ssd, T.est, sd.ar.est, huawei.df.w)

#------------------------------------------------------------------------------#
# (2.2) ZTE: Market Model and Abnormal Returns
#------------------------------------------------------------------------------#

##-- ZTE: Formatting --##

## Re-assign df
df <- zte.df.w

## Merge nasdaq
df <- merge(df, nasdaq.df, all.x = TRUE)

## Add identifier to column names and replace
c.names <- colnames(df)[-1]
c.names <- paste0(c.names, ".Z") # ZTE (Z)
colnames(df)[-1] <- c.names
rm(c.names)

## Identify t = 0
t.zero <- which(df$date == "2016-02-23")

## Create time identifiers and range
lb.e <- t.zero - (wind.l + est.l) # Lower bound (estimation)
ub.w <- t.zero + wind.l # Upper bound (window)
range.w <- lb.e:ub.w

## Create window
window.df <- df[range.w,]

## Re-name rows
rownames(window.df) <- 1:nrow(window.df)

## Check NAs
summary(window.df)

## Add time var
window.df$time <- ((wind.l + est.l)*-1):wind.l

## Inspect over event window
window.df[(nrow(window.df)-(2*wind.l)):nrow(window.df),
          c(1,ncol(window.df))] # Looks good

## Drop the date column
window.df <- window.df[-1]

## Clean up
rm(df, lb.e, range.w, t.zero, ub.w)

##-- ZTE: Estimate alpha and beta --##

## Set up data frame
market.df <- data.frame(
  firm = colnames(window.df)[-c(ncol(window.df), ncol(window.df) - 1)],
  alpha = NA,
  beta = NA,
  stringsAsFactors = FALSE
)

str(market.df)

## Ensure names align
identical(market.df$firm,
          colnames(window.df)[-c(ncol(window.df), ncol(window.df) - 1)])

## Loop over firms, generating alpha and beta
n <- length(market.df$firm)

for (i in 1:n){
  fit <- lm(window.df[1:est.l,i] ~ nasdaq.Z, data = window.df[1:est.l,])
  market.df[i,2] <- fit$coefficients[1]
  market.df[i,3] <- fit$coefficients[2]
}

rm(i, fit)

## Check for consistency
(fit.1 <- lm(window.df[1:est.l,1] ~ nasdaq.Z, data = window.df[1:est.l,]))
(fit.5 <- lm(window.df[1:est.l,5] ~ nasdaq.Z, data = window.df[1:est.l,]))
(fit.10 <- lm(window.df[1:est.l,10] ~ nasdaq.Z, data = window.df[1:est.l,]))
rm(fit.1, fit.5, fit.10)

##-- ZTE: Combine Data --##

## Melt data
firms.df <- melt(window.df, id.vars = "time")
str(firms.df)
firms.df$variable <- as.character(firms.df$variable)

## Extract nasdaq and eliminate from firms
nasdaq.i <- which(firms.df$variable == "nasdaq.Z")
n.df <- firms.df[nasdaq.i,]
firms.df <- firms.df[-nasdaq.i,]

# Rename nasdaq & Re-Organize
colnames(n.df)[3] <- "nasdaq"
n.df <- n.df[-2]
rownames(n.df) <- 1:nrow(n.df)

##-- ZTE: Create elements for BMP stat --##
# See: BMP (1991, Appendix, 269)
n.est.df <- n.df[1:est.l,]
T.est <- nrow(n.est.df) # "T.i"
Rm.bar <- mean(n.est.df$nasdaq)
n.est.df$sqd <- (n.est.df$nasdaq - Rm.bar)^2 # Squared deviations R_mt from Rm_bar
Rm.ssd <- sum(n.est.df$sqd) # Sum of squared deviations R_mt from Rm_bar
rm(n.est.df)

## Rename
colnames(firms.df)[2] <- "firm"

## Merge with alpha and beta
firms.df <- merge(firms.df, market.df, all.x = TRUE)

## Merge nasdaq
firms.df <- merge(firms.df, n.df, all.x = TRUE)

## Clean up
rm(market.df, n.df, window.df, n, nasdaq.i)

##-- ZTE: Generate returns --##

## Risk-adjusted expected return
firms.df$er <- with(firms.df, alpha + (beta * nasdaq))

## Risk-adjusted abnormal return
firms.df$ar <- with(firms.df, value - er)

## SD of AR during estimation period (for BMP statistic)
sd.ar.est <- aggregate(ar ~ firm, data = firms.df[firms.df$time < -wind.l,], sd)
colnames(sd.ar.est)[2] <- "sd.ar.est"

## Bind SD
firms.df <- merge(firms.df, sd.ar.est, all.x = TRUE)

## Sort
index <- with(firms.df, order(firm, time))
firms.df <- firms.df[index,]
rownames(firms.df) <- 1:nrow(firms.df)
rm(index)

## Forecast error adjustment factor (BMP statistic)
firms.df$fe_adj <-
  with(firms.df, sqrt(1 + (1/T.est) + (nasdaq - Rm.bar)^2 / Rm.ssd))

## Standardized residual (for BMP statistic)
# Apply the forecast error to observations in the event window
firms.df$sr <- 
  with(firms.df,
       ifelse(time < -wind.l, ar / sd.ar.est, ar / (sd.ar.est * fe_adj)))

##-- ZTE: Develop data frames --##

## Add indicator for event
firms.df$event <- "ZTE"

## Create data frame
zte.df <- firms.df[c(1,2,8,11,12)]

## Clean up
rm(firms.df, Rm.bar, Rm.ssd, T.est, sd.ar.est, zte.df.w)

#------------------------------------------------------------------------------#
# (2.3) Surveillance: Market Model and Abnormal Returns
#------------------------------------------------------------------------------#

##-- Surveillance: Formatting --##

## Re-assign df
df <- surveillance.df.w

## Merge nasdaq
df <- merge(df, nasdaq.df, all.x = TRUE)

## Add identifier to column names and replace
c.names <- colnames(df)[-1]
c.names <- paste0(c.names, ".S") # Surveillance (S)
colnames(df)[-1] <- c.names
rm(c.names)

## Identify t = 0
t.zero <- which(df$date == "2019-11-13")

## Create time identifiers and range
lb.e <- t.zero - (wind.l + est.l) # Lower bound (estimation)
ub.w <- t.zero + wind.l # Upper bound (window)
range.w <- lb.e:ub.w

## Create window
window.df <- df[range.w,]

## Re-name rows
rownames(window.df) <- 1:nrow(window.df)

## Check NAs
summary(window.df)

## Add time var
window.df$time <- ((wind.l + est.l)*-1):wind.l

## Inspect over event window
window.df[(nrow(window.df)-(2*wind.l)):nrow(window.df),
          c(1,ncol(window.df))] # Looks good

## Drop the date column
window.df <- window.df[-1]

## Clean up
rm(df, lb.e, range.w, t.zero, ub.w)

##-- Surveillance: Estimate alpha and beta --##

## Set up data frame
market.df <- data.frame(
  firm = colnames(window.df)[-c(ncol(window.df), ncol(window.df) - 1)],
  alpha = NA,
  beta = NA,
  stringsAsFactors = FALSE
)

str(market.df)

## Ensure names align
identical(market.df$firm,
          colnames(window.df)[-c(ncol(window.df), ncol(window.df) - 1)])

## Loop over firms, generating alpha and beta
n <- length(market.df$firm)

for (i in 1:n){
  fit <- lm(window.df[1:est.l,i] ~ nasdaq.S, data = window.df[1:est.l,])
  market.df[i,2] <- fit$coefficients[1]
  market.df[i,3] <- fit$coefficients[2]
}

rm(i, fit)

## Check for consistency
(fit.1 <- lm(window.df[1:est.l,1] ~ nasdaq.S, data = window.df[1:est.l,]))
(fit.5 <- lm(window.df[1:est.l,5] ~ nasdaq.S, data = window.df[1:est.l,]))
(fit.10 <- lm(window.df[1:est.l,10] ~ nasdaq.S, data = window.df[1:est.l,]))
rm(fit.1, fit.5, fit.10)

##-- Surveillance: Combine Data --##

## Melt data
firms.df <- melt(window.df, id.vars = "time")
str(firms.df)
firms.df$variable <- as.character(firms.df$variable)

## Extract nasdaq and eliminate from firms
nasdaq.i <- which(firms.df$variable == "nasdaq.S")
n.df <- firms.df[nasdaq.i,]
firms.df <- firms.df[-nasdaq.i,]

# Rename nasdaq & Re-Organize
colnames(n.df)[3] <- "nasdaq"
n.df <- n.df[-2]
rownames(n.df) <- 1:nrow(n.df)

##-- Surveillance: Create elements for BMP stat --##
# See: BMP (1991, Appendix, 269)
n.est.df <- n.df[1:est.l,]
T.est <- nrow(n.est.df) # "T.i"
Rm.bar <- mean(n.est.df$nasdaq)
n.est.df$sqd <- (n.est.df$nasdaq - Rm.bar)^2 # Squared deviations R_mt from Rm_bar
Rm.ssd <- sum(n.est.df$sqd) # Sum of squared deviations R_mt from Rm_bar
rm(n.est.df)

## Rename
colnames(firms.df)[2] <- "firm"

## Merge with alpha and beta
firms.df <- merge(firms.df, market.df, all.x = TRUE)

## Merge nasdaq
firms.df <- merge(firms.df, n.df, all.x = TRUE)

## Clean up
rm(market.df, n.df, window.df, n, nasdaq.i)

##-- Surveillance: Generate returns --##

## Risk-adjusted expected return
firms.df$er <- with(firms.df, alpha + (beta * nasdaq))

## Risk-adjusted abnormal return
firms.df$ar <- with(firms.df, value - er)

## SD of AR during estimation period (for BMP statistic)
sd.ar.est <- aggregate(ar ~ firm, data = firms.df[firms.df$time < -wind.l,], sd)
colnames(sd.ar.est)[2] <- "sd.ar.est"

## Bind SD
firms.df <- merge(firms.df, sd.ar.est, all.x = TRUE)

## Sort
index <- with(firms.df, order(firm, time))
firms.df <- firms.df[index,]
rownames(firms.df) <- 1:nrow(firms.df)
rm(index)

## Forecast error adjustment factor (BMP statistic)
firms.df$fe_adj <-
  with(firms.df, sqrt(1 + (1/T.est) + (nasdaq - Rm.bar)^2 / Rm.ssd))

## Standardized residual (for BMP statistic)
# Apply the forecast error to observations in the event window
firms.df$sr <- 
  with(firms.df,
       ifelse(time < -wind.l, ar / sd.ar.est, ar / (sd.ar.est * fe_adj)))

##-- Surveillance: Develop data frames --##

## Add indicator for event
firms.df$event <- "Surveillance"

## Create data frame
surveillance.df <- firms.df[c(1,2,8,11,12)]

## Clean up
rm(firms.df, Rm.bar, Rm.ssd, T.est, sd.ar.est, surveillance.df.w,
   est.l, wind.l, nasdaq.df)

#------------------------------------------------------------------#
# 4) Combine and export data
#------------------------------------------------------------------#

firms.df <- rbind(huawei.df, zte.df, surveillance.df)
write.csv(firms.df, "placebo_data.csv", row.names = FALSE)

rm(list=ls())
